global root_dir = "`1'"

include "$root_dir/code/config/config.do"


cap noi log using ${log_dir}/0_uspto_patents_extract.log, replace name(dat)

*Handle empty arguments
global arg1 = cond("`2'" == "___EMPTY___", "", "`2'")
global arg2 = cond("`3'" == "___EMPTY___", "", "`3'")
global arg3 = cond("`4'" == "___EMPTY___", "", "`4'")
global arg4 = cond("`5'" == "___EMPTY___", "", "`5'")

if "$arg1" != "" {
    global weight_category "$arg1"
    di "Weight category: ${weight_category}"
}

if "$arg2" != "" {
    global weight_versions "$arg2"
    di "Weight versions: ${weight_versions}"
}

if "$arg3" != "" {
    global weight_window "$arg3"
    di "Weight window: ${weight_window}"
}

if "$arg4" != "" {
	global wtype "$arg4"
}
di "${wtype}"

capture noi {

************************************************************
*	Does what the name suggests: extracts USPTO patents	   *
************************************************************
*Originally the file was made to be flexible and extract patents from any authority
*However, it is now used only for USPTO patents
*I will leave this in in case someone needs it at some point for whatever project they want to do
global auth "US"

*load applications and keep only granted ones
use appln_id appln_auth appln_year granted using ${commondata_dir}/patstat_2018b/appln_info.dta, clear
keep if granted == "Y"
drop granted

*correct GER/GDR and RUS/SU, add our patent family identifier
do ${code_dir}/config/auth_map.do appln_auth
mmerge appln_id using ${commondata_dir}/patstat_2018b/family_info.dta, unmatched(master) ukeep(docdb_family_id)
drop _m

*generate the earliest application year by patent family
ren appln_year _appln_year
bys docdb_family_id : egen appln_year = min(_appln_year)
drop _appln_year


keep if appln_auth == "US"	
drop appln_auth

*load different technologies
foreach xxx in auto90 auto95 pauto90 pauto95 pauto90_rm6 {
	mmerge appln_id using ${dataset_dir}/patent_list/`xxx'_patents.dta, unmatched(master)
	gen `xxx'=(_m==3)
}
drop _m

*laod machinery patents
mmerge appln_id using ${dataset_dir}/patent_list/pats_tfa.dta, unmatched(master)
gen in_relevant_field = _m == 3
drop _m

drop appln_id
duplicates drop

*load biadic information
mmerge docdb_family_id using ${dataset_dir}/patstat_orbis/docdb_families2.dta, ukeep(biadic_D) unmatched(master)
ren biadic_D biadic
drop _m

* get ipc codes and merge share_any_classification
mmerge docdb_family_id using ${dataset_dir}/patstat_orbis/docdb_family_id_cipc_codes.dta, unmatched(none)
drop _m
sort docdb_family_id cipc6
compress
save ${alm_data_proc}/${auth}_docdb_cipc.dta, replace

*load the mapping to our aggregated ipc codes with more than 100 patent families
import delim using ${classification_dir}/V6/ipc6XX_mapping.csv, varnames(1) stringcols(1) clear
ren (ipc6 ipc6xx) (cipc6 cipc6xx)
tempfile ipc6xx_shareanyclass
save `ipc6xx_shareanyclass'

import delim using ${classification_dir}/V6/ipc6XX.csv, varnames(1) stringcols(1) clear
ren ipc6 cipc6xx
keep cipc6 share_anyclassification
mmerge cipc6 using `ipc6xx_shareanyclass', unmatched(master)
replace cipc6 = cipc6xx if cipc6 == ""
tempfile ipc6_shareanyclass
save `ipc6_shareanyclass'

*combine the any classification with the rest of the technologies
use ${alm_data_proc}/${auth}_docdb_cipc.dta, clear
mmerge cipc6 using `ipc6_shareanyclass', unmatched(master) ukeep(share_anyclassification)
drop _m

sort docdb_family_id cipc6
compress
save ${alm_data_proc}/${auth}_docdb_cipc.dta, replace


}
if _rc == 0 {
    display "Execution finished successfully."
}
else {
    display "Execution finished with errors."
}

cap log close dat
